In [1]:
from util import *
from glob import glob
import matplotlib.pyplot as plt
from shapely import wkt
pd.set_option("display.max_columns", None)
In [2]:
gdf = gpd.read_file(f"Manuwatu_Test/transects_intersects_20240315_120828.geojson")
gdf["Date"] = pd.to_datetime(gdf.ShorelineID, dayfirst=True, format='mixed')
gdf["Year"] = gdf.Date.dt.year
gdf["YearsSinceBase"] = (gdf.Date - pd.Timestamp(1800, 1, 1)).dt.days / 365.25
gdf["YearsUntilFuture"] = (
pd.Timestamp(2100, 1, 1) - gdf.Date
).dt.days / 365.25
gdf.Date = gdf.Date.astype(str)
gdf
Out[2]:
| TransectID | ShorelineID | BaselineID | Distance | IntersectX | IntersectY | Uncertainty | geometry | Date | Year | YearsSinceBase | YearsUntilFuture | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 05/12/2020 | 0 | -30.72 | 1.664823e+06 | 5.652036e+06 | 10.00 | POINT (1664822.752 5652035.621) | 2020-12-05 | 2020 | 220.922656 | 79.071869 |
| 1 | 1 | 11/02/2017 | 0 | -29.69 | 1.664822e+06 | 5.652035e+06 | 10.00 | POINT (1664821.739 5652035.474) | 2017-02-11 | 2017 | 217.108830 | 82.885695 |
| 2 | 1 | 08/11/1970 | 0 | -27.86 | 1.664820e+06 | 5.652035e+06 | 2.63 | POINT (1664819.922 5652035.211) | 1970-11-08 | 1970 | 170.847365 | 129.147159 |
| 3 | 1 | 19/09/1955 | 0 | -25.63 | 1.664818e+06 | 5.652035e+06 | 2.68 | POINT (1664817.715 5652034.890) | 1955-09-19 | 1955 | 155.709788 | 144.284736 |
| 4 | 2 | 05/12/2020 | 0 | -31.94 | 1.664830e+06 | 5.651936e+06 | 10.00 | POINT (1664830.066 5651936.243) | 2020-12-05 | 2020 | 220.922656 | 79.071869 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9214 | 1925 | 23/01/2016 | 244 | -39.52 | 1.904293e+06 | 5.511354e+06 | 1.02 | POINT (1904293.154 5511353.967) | 2016-01-23 | 2016 | 216.054757 | 83.939767 |
| 9215 | 1925 | 31/03/1944 | 244 | -29.47 | 1.904294e+06 | 5.511344e+06 | 2.27 | POINT (1904293.505 5511343.923) | 1944-03-31 | 1944 | 144.240931 | 155.753593 |
| 9216 | 1926 | 29/04/2021 | 244 | -36.22 | 1.904393e+06 | 5.511358e+06 | 1.02 | POINT (1904392.891 5511358.332) | 2021-04-29 | 2021 | 221.319644 | 78.674880 |
| 9217 | 1926 | 23/01/2016 | 244 | -36.58 | 1.904393e+06 | 5.511359e+06 | 1.02 | POINT (1904392.878 5511358.698) | 2016-01-23 | 2016 | 216.054757 | 83.939767 |
| 9218 | 1926 | 31/03/1944 | 244 | -24.95 | 1.904393e+06 | 5.511347e+06 | 2.27 | POINT (1904393.306 5511347.071) | 1944-03-31 | 1944 | 144.240931 | 155.753593 |
9219 rows × 12 columns
In [3]:
lines = gpd.read_file("Manuwatu_Test/t10_transects_azi.shp")
lines
Out[3]:
| ObjectID_1 | BaselineID | TransOrder | TransEdit | Azimuth | geometry | |
|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 1.0 | 0.0 | 81.743293 | LINESTRING Z (1664792.355 5652031.210 0.000, 1... |
| 1 | 2.0 | 0.0 | 2.0 | 0.0 | 81.593561 | LINESTRING Z (1664798.470 5651931.574 0.000, 1... |
| 2 | 3.0 | 0.0 | 3.0 | 0.0 | 80.678646 | LINESTRING Z (1664800.863 5651831.613 0.000, 1... |
| 3 | 4.0 | 0.0 | 4.0 | 0.0 | 80.416187 | LINESTRING Z (1664809.739 5651732.147 0.000, 1... |
| 4 | 5.0 | 0.0 | 5.0 | 0.0 | 77.954781 | LINESTRING Z (1664832.184 5651634.801 0.000, 1... |
| ... | ... | ... | ... | ... | ... | ... |
| 1889 | 1924.0 | 244.0 | 1924.0 | 0.0 | 358.475284 | LINESTRING Z (1904194.624 5511311.003 0.000, 1... |
| 1890 | 1925.0 | 244.0 | 1925.0 | 0.0 | 357.996994 | LINESTRING Z (1904294.535 5511314.467 0.000, 1... |
| 1891 | 1926.0 | 244.0 | 1926.0 | 0.0 | 357.892961 | LINESTRING Z (1904394.223 5511322.139 0.000, 1... |
| 1892 | 1927.0 | 245.0 | 1927.0 | 0.0 | 343.686148 | LINESTRING Z (1787772.153 5517489.901 0.000, 1... |
| 1893 | 1928.0 | 245.0 | 1928.0 | 0.0 | 343.686148 | LINESTRING Z (1787867.064 5517520.834 0.000, 1... |
1894 rows × 6 columns
In [4]:
def get_azimuth(line):
p1, p2 = line.coords[1:]
azimuth = math.degrees(math.atan2(p2[0]-p1[0], p2[1]-p1[1]))
if azimuth < 0:
azimuth += 360
return azimuth
lines["calculated_azimuth"] = lines.geometry.apply(get_azimuth)
lines
Out[4]:
| ObjectID_1 | BaselineID | TransOrder | TransEdit | Azimuth | geometry | calculated_azimuth | |
|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 1.0 | 0.0 | 81.743293 | LINESTRING Z (1664792.355 5652031.210 0.000, 1... | 81.743293 |
| 1 | 2.0 | 0.0 | 2.0 | 0.0 | 81.593561 | LINESTRING Z (1664798.470 5651931.574 0.000, 1... | 81.593561 |
| 2 | 3.0 | 0.0 | 3.0 | 0.0 | 80.678646 | LINESTRING Z (1664800.863 5651831.613 0.000, 1... | 80.678646 |
| 3 | 4.0 | 0.0 | 4.0 | 0.0 | 80.416187 | LINESTRING Z (1664809.739 5651732.147 0.000, 1... | 80.416187 |
| 4 | 5.0 | 0.0 | 5.0 | 0.0 | 77.954781 | LINESTRING Z (1664832.184 5651634.801 0.000, 1... | 77.954781 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1889 | 1924.0 | 244.0 | 1924.0 | 0.0 | 358.475284 | LINESTRING Z (1904194.624 5511311.003 0.000, 1... | 358.475284 |
| 1890 | 1925.0 | 244.0 | 1925.0 | 0.0 | 357.996994 | LINESTRING Z (1904294.535 5511314.467 0.000, 1... | 357.996994 |
| 1891 | 1926.0 | 244.0 | 1926.0 | 0.0 | 357.892961 | LINESTRING Z (1904394.223 5511322.139 0.000, 1... | 357.892961 |
| 1892 | 1927.0 | 245.0 | 1927.0 | 0.0 | 343.686148 | LINESTRING Z (1787772.153 5517489.901 0.000, 1... | 343.686148 |
| 1893 | 1928.0 | 245.0 | 1928.0 | 0.0 | 343.686148 | LINESTRING Z (1787867.064 5517520.834 0.000, 1... | 343.686148 |
1894 rows × 7 columns
In [5]:
gdf.Year.describe()
Out[5]:
count 9219.000000 mean 1990.560581 std 28.088717 min 1939.000000 25% 1967.000000 50% 1995.000000 75% 2017.000000 max 2022.000000 Name: Year, dtype: float64
In [6]:
gdf.Distance.describe()
Out[6]:
count 9219.000000 mean -61.999667 std 69.108880 min -1914.190000 25% -74.220000 50% -39.960000 75% -25.395000 max -4.770000 Name: Distance, dtype: float64
In [7]:
transect_metadata = get_transect_metadata(f"Manuwatu_Test/t10_transects_azi.shp")
In [8]:
#win_type='gaussian' allows weighting to occur in the rolling slope function
#####BUG IN THE CODE THAT DOES NOT ALLOW WEIGHTING TO OCCUR#####
linear_models = fit(gdf, transect_metadata)
rolled_slopes = linear_models.groupby("group").slope.rolling(10, min_periods=1, win_type="gaussian").mean().dropna().reset_index(level=0)
linear_models.slope = rolled_slopes.slope
linear_models.dropna(inplace=True)
linear_models
c:\Users\lalit\anaconda3\envs\environment\lib\site-packages\sklearn\metrics\_regression.py:996: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples. warnings.warn(msg, UndefinedMetricWarning) c:\Users\lalit\anaconda3\envs\environment\lib\site-packages\sklearn\metrics\_regression.py:996: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples. warnings.warn(msg, UndefinedMetricWarning) c:\Users\lalit\anaconda3\envs\environment\lib\site-packages\sklearn\metrics\_regression.py:996: UndefinedMetricWarning: R^2 score is not well-defined with less than two samples. warnings.warn(msg, UndefinedMetricWarning)
Out[8]:
| TransectID | slope | intercept | group | r2_score | mae | mse | rmse | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | -0.065571 | -15.941371 | 0 | 0.926099 | 0.504336 | 0.276915 | 0.526227 |
| 1 | 2 | -0.078177 | -11.259434 | 0 | 0.957393 | 0.507380 | 0.296034 | 0.544090 |
| 2 | 3 | -0.070524 | -15.562882 | 0 | 0.589842 | 1.023301 | 1.711207 | 1.308131 |
| 3 | 4 | -0.066901 | -15.276985 | 0 | 0.686399 | 0.931125 | 1.157774 | 1.075999 |
| 4 | 5 | -0.069303 | -13.214268 | 0 | 0.760956 | 1.230886 | 1.578735 | 1.256477 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1885 | 1923 | 0.225420 | -28.613747 | 163 | 0.026739 | 1.182115 | 1.962700 | 1.400964 |
| 1886 | 1924 | 0.106620 | -16.956773 | 163 | 0.999680 | 0.052252 | 0.003835 | 0.061926 |
| 1887 | 1925 | 0.026290 | -10.756660 | 163 | 0.986967 | 0.443378 | 0.276111 | 0.525462 |
| 1888 | 1926 | -0.012694 | -2.926398 | 163 | 0.992207 | 0.402196 | 0.227201 | 0.476656 |
| 1889 | 1928 | -1.901856 | -199.126821 | 164 | 0.996629 | 0.849264 | 0.882216 | 0.939263 |
1887 rows × 8 columns
In [9]:
def calculate_new_coordinates(old_x, old_y, bearing, distance):
bearing_radians = math.radians(bearing)
new_x = old_x + (distance * math.sin(bearing_radians))
new_y = old_y + (distance * math.cos(bearing_radians))
point = Point(new_x, new_y)
assert not point.is_empty
return point
def predict(
df: pd.DataFrame,
linear_models: pd.DataFrame,
transect_metadata: dict,
):
"""_summary_
Args:
df (pd.DataFrame): dataframe with columns: TransectID, Date, Distance, YearsSinceBase
linear_models (pd.DataFrame): dataframe with columns: TransectID, slope, intercept
transect_metadata (dict): dict lookup of TransectID to Azimuth & group
Returns:
pd.DataFrame: resulting prediction points for the year 2100
"""
results = []
for i, row in linear_models.iterrows():
transect_ID = row.TransectID
transect_df = df[df.TransectID == transect_ID]
latest_row = transect_df[transect_df.Date == transect_df["Date"].max()].iloc[0]
future_year = int(row.get("FUTURE_YEAR", FUTURE_YEAR))
result = row.to_dict()
result.update({
"TransectID": transect_ID,
"BaselineID": latest_row.BaselineID,
"group": row.group,
"Year": future_year,
"ocean_point": calculate_new_coordinates(
latest_row.geometry.x,
latest_row.geometry.y,
transect_metadata[transect_ID]["Azimuth"] + 180,
500,
),
})
model = "linear"
slope = row.slope
intercept = row.intercept
predicted_distance = slope * (future_year - 1800) + intercept
distance_difference = latest_row.Distance - predicted_distance
result[f"{model}_model_point"] = calculate_new_coordinates(
latest_row.geometry.x,
latest_row.geometry.y,
transect_metadata[transect_ID]["Azimuth"],
distance_difference,
)
result[f"{model}_model_predicted_distance"] = predicted_distance
result[f"{model}_model_distance"] = distance_difference
results.append(result)
results = gpd.GeoDataFrame(results)
return results
In [10]:
results = predict(gdf, linear_models, transect_metadata)
results
Out[10]:
| TransectID | slope | intercept | group | r2_score | mae | mse | rmse | BaselineID | Year | ocean_point | linear_model_point | linear_model_predicted_distance | linear_model_distance | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | -0.065571 | -15.941371 | 0.0 | 0.926099 | 0.504336 | 0.276915 | 0.526227 | 0 | 2100 | POINT (1664327.9351324248 5651963.817190449) | POINT (1664827.5942540085 5652036.324033046) | -35.612543 | 4.892543 |
| 1 | 2.0 | -0.078177 | -11.259434 | 0.0 | 0.957393 | 0.507380 | 0.296034 | 0.544090 | 0 | 2100 | POINT (1664335.437879246 5651863.146308652) | POINT (1664832.8086359142 5651936.648743532) | -34.712590 | 2.772590 |
| 2 | 3.0 | -0.070524 | -15.562882 | 0.0 | 0.589842 | 1.023301 | 1.711207 | 1.308131 | 0 | 2100 | POINT (1664335.297331601 5651755.195818301) | POINT (1664837.1025794188 5651837.561631945) | -36.720043 | 8.520043 |
| 3 | 4.0 | -0.066901 | -15.276985 | 0.0 | 0.686399 | 0.931125 | 1.157774 | 1.075999 | 0 | 2100 | POINT (1664344.5869323318 5651653.607125445) | POINT (1664844.5968896376 5651738.032181877) | -35.347317 | 7.087317 |
| 4 | 5.0 | -0.069303 | -13.214268 | 0.0 | 0.760956 | 1.230886 | 1.578735 | 1.256477 | 0 | 2100 | POINT (1664374.3334289133 5651537.103741099) | POINT (1664865.4424388874 5651641.897360339) | -34.005075 | 2.165075 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1882 | 1923.0 | 0.225420 | -28.613747 | 163.0 | 0.026739 | 1.182115 | 1.962700 | 1.400964 | 244 | 2100 | POINT (1904103.7288165868 5510835.704927893) | POINT (1904095.4086911152 5511271.11324593) | 39.012195 | -64.512195 |
| 1883 | 1924.0 | 0.106620 | -16.956773 | 163.0 | 0.999680 | 0.052252 | 0.003835 | 0.061926 | 244 | 2100 | POINT (1904206.894713303 5510849.992895648) | POINT (1904195.0237154006 5511295.975836613) | 15.029098 | -53.859098 |
| 1884 | 1925.0 | 0.026290 | -10.756660 | 163.0 | 0.986967 | 0.443378 | 0.276111 | 0.525462 | 244 | 2100 | POINT (1904310.650852537 5510853.672411901) | POINT (1904294.4349141892 5511317.338654199) | -2.869718 | -36.050282 |
| 1885 | 1926.0 | -0.012694 | -2.926398 | 163.0 | 0.992207 | 0.402196 | 0.227201 | 0.476656 | 244 | 2100 | POINT (1904411.2744729214 5510858.670062316) | POINT (1904393.9753056369 5511328.866490144) | -6.734550 | -29.485450 |
| 1886 | 1928.0 | -1.901856 | -199.126821 | 164.0 | 0.996629 | 0.849264 | 0.882216 | 0.939263 | 245 | 2100 | POINT (1787833.198058164 5517636.541552375) | POINT (1787650.8600291626 5518259.52981814) | -769.683668 | 149.123668 |
1887 rows × 14 columns
In [11]:
results.set_geometry("linear_model_point", inplace=True, crs=2193)
results
Out[11]:
| TransectID | slope | intercept | group | r2_score | mae | mse | rmse | BaselineID | Year | ocean_point | linear_model_point | linear_model_predicted_distance | linear_model_distance | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | -0.065571 | -15.941371 | 0.0 | 0.926099 | 0.504336 | 0.276915 | 0.526227 | 0 | 2100 | POINT (1664327.9351324248 5651963.817190449) | POINT (1664827.594 5652036.324) | -35.612543 | 4.892543 |
| 1 | 2.0 | -0.078177 | -11.259434 | 0.0 | 0.957393 | 0.507380 | 0.296034 | 0.544090 | 0 | 2100 | POINT (1664335.437879246 5651863.146308652) | POINT (1664832.809 5651936.649) | -34.712590 | 2.772590 |
| 2 | 3.0 | -0.070524 | -15.562882 | 0.0 | 0.589842 | 1.023301 | 1.711207 | 1.308131 | 0 | 2100 | POINT (1664335.297331601 5651755.195818301) | POINT (1664837.103 5651837.562) | -36.720043 | 8.520043 |
| 3 | 4.0 | -0.066901 | -15.276985 | 0.0 | 0.686399 | 0.931125 | 1.157774 | 1.075999 | 0 | 2100 | POINT (1664344.5869323318 5651653.607125445) | POINT (1664844.597 5651738.032) | -35.347317 | 7.087317 |
| 4 | 5.0 | -0.069303 | -13.214268 | 0.0 | 0.760956 | 1.230886 | 1.578735 | 1.256477 | 0 | 2100 | POINT (1664374.3334289133 5651537.103741099) | POINT (1664865.442 5651641.897) | -34.005075 | 2.165075 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1882 | 1923.0 | 0.225420 | -28.613747 | 163.0 | 0.026739 | 1.182115 | 1.962700 | 1.400964 | 244 | 2100 | POINT (1904103.7288165868 5510835.704927893) | POINT (1904095.409 5511271.113) | 39.012195 | -64.512195 |
| 1883 | 1924.0 | 0.106620 | -16.956773 | 163.0 | 0.999680 | 0.052252 | 0.003835 | 0.061926 | 244 | 2100 | POINT (1904206.894713303 5510849.992895648) | POINT (1904195.024 5511295.976) | 15.029098 | -53.859098 |
| 1884 | 1925.0 | 0.026290 | -10.756660 | 163.0 | 0.986967 | 0.443378 | 0.276111 | 0.525462 | 244 | 2100 | POINT (1904310.650852537 5510853.672411901) | POINT (1904294.435 5511317.339) | -2.869718 | -36.050282 |
| 1885 | 1926.0 | -0.012694 | -2.926398 | 163.0 | 0.992207 | 0.402196 | 0.227201 | 0.476656 | 244 | 2100 | POINT (1904411.2744729214 5510858.670062316) | POINT (1904393.975 5511328.866) | -6.734550 | -29.485450 |
| 1886 | 1928.0 | -1.901856 | -199.126821 | 164.0 | 0.996629 | 0.849264 | 0.882216 | 0.939263 | 245 | 2100 | POINT (1787833.198058164 5517636.541552375) | POINT (1787650.860 5518259.530) | -769.683668 | 149.123668 |
1887 rows × 14 columns
In [12]:
poly = prediction_results_to_polygon(results)
output_shapefile = "Manuwatu_Test/weighted_projection_output.shp"
poly.to_file(output_shapefile, driver="ESRI Shapefile")
In [13]:
m = poly.explore(tiles="Esri.WorldImagery")
gpd.GeoDataFrame(results.drop(columns=["ocean_point", "linear_model_point"]), geometry=results.linear_model_point).explore(m=m)
gdf.explore("Year", legend=True, m=m)
Out[13]:
Make this Notebook Trusted to load map: File -> Trust Notebook